#  Look at the binomial probability distribution

################################################
## Important Note:  Students are not expected ##
## to be able to come up with the ideas and   ##
## programming examples demonstrated on this  ##
## script.                                    ##
################################################
#
#  In the binomial situation we have:
#   a) a fixed number of trials 
#   b) each trial has exactly two possible outcomes,
#          a success or a failure
#   c) the probability of success is the same for
#          each trial
#   d) the trials are independent
#   e) we are looking at a random variable that
#         is the number of successes in those
#         trials 

# Look at a case where we have 5 trials
# How many ways could we get 2 successes in those
#    five trials (positions noted above items)
# 12345    12345   12345   12345
# SFFFS    FSFFS   FFSFS   FFFSS
# SFFSF    FSFSF   FFSSF
# SFSFF    FSSFF
# SSFFF
#  so there are 10 ways.
#  In the list of possible ways of getting
#  2 successes in 5 trials note the position
#  of the successes.  Translating those positions
#  into a new list gives us
#  1 2        2 3        3 4       4 5
#  1 3        2 4        3 5
#  1 4        2 5 
#  1 5
#  We could see these same values by using
#  The combinations function, but we need to 
#  install that.
install.packages("gtools")
library(gtools)
combinations(5,2,c(1,2,3,4,5))
#
#  Again, we note that there are 10 different
#  items that represent the ways we could get
#  2 successes in 5 trials.  That number of
#  different ways is the number of combinations 
#  of 5 things take 2 at a time. the function
#  combinations(5,2,c(1,2,3,4,5)) lists all of
#  them, but we just need to know the number 
#  of them.  For that we could use nCr(5,2)
#  or num_comb(5,2).
source("../combinations.R")
nCr(5,2)  # is one alternative
num_comb(5,2) # is a different alternative

#  Now, if the P(success) = p then
#              P(failure) = (1-p)
#  Next note that the probability of getting any 
#  one of the items with 2 successes and 3 
#  failures is  p^2 * (1-p)^3

#  Therefore, the probability of getting any of
#  the 10 items is 10*p^2*(1-p)^3
#
#  Or, more generally, the probability of getting
#  exactly r successes in n trials when the
#  probability of success on one trial is p is 
#  given by nCr(n,r)*p^r * (1-p)^(n-r)

# So, the long way to find the probability of 
# getting exactly 4 successes in 10 trials when
# the probability of getting a success in one
# trial is 0.45 is given by

nCr(10,4)*0.45^4 * (1-0.45)^(10-4)
#
#  We have a function that will compute this
source("../pbinomeq.R")
pbinomeq(4,10, 0.45 )

#  Here are a few more examples
pbinomeq( 0, 10, 0.45)  # P( 0 successes )
pbinomeq( 1, 10, 0.45)  # P( 1 success )
pbinomeq( 2, 10, 0.45)  # P( 2 successes )
pbinomeq( 3, 10, 0.45)  # P( 3 successes )
###############################################
##  Stop here and compare these results to   ##
##  the values shown in the binomial         ##
##  distribution table for 10 trials that is ##
##  on the web page.                         ##
###############################################

# This is all well and good, but the usual question
# in this area is to find the probability of
# getting 3 or fewer successes in 10 trials where
# the probability of a success on one trial is 0.45.
#
#  We could just add up the last four results as in
#  P(X=0) + P(X=1) + P(X=2) + P(X=3) = P(X <= 3)
#
pbinomeq( 0, 10, 0.45) +
  pbinomeq( 1, 10, 0.45) +
  pbinomeq( 2, 10, 0.45) +
  pbinomeq( 3, 10, 0.45)
#
#  In the old days we would look this up in a table
#  for the cumulative probabilities in binomial
#  settings.  (see the web page)
###############################################
##  Stop here and compare these results to   ##
##  the values shown in the table on the     ##
##  web page.                                ##
###############################################

# R has a built-in function, pbinom(), that computes
# the cumulative probability for a binomial
# situation. So, P(X<=3) for 10 trials when the
# probability of success on a single trial is 0.45
# is
pbinom( 3, 10, 0.45)

#  Of course, now that we have pbinom() we really
#  do not need pbinomeq because for 13 trials with
#  the probability of a single success being 0.423
#  we can find P(X=6) by 
pbinom(6,13,0.423) - pbinom(5,13,0.423)
#  Why did that work?????????????????
# let us compare that to 
pbinomeq( 6, 13, 0.423 )
#  could we look this up on the table?
#  In fact, look it up on both tables

           ##### pause to do the look ups

########################################
##   Look at a whole range of problems that we 
##   can now do using pbinom()

##  For all of these we will look at 13 trials 
##  and a probability of success on a single
##  trial will be 0.393

# Find the probability of getting 7 or fewer successes
pbinom( 7, 13, 0.393)

# Find the probability of getting fewer than 4 successes
pbinom( 3,13, 0.393)

# Find the probability of getting more than 8 successes
1 - pbinom( 8, 13, 0.393 )

# Find the probability of getting between 5 and 8, 
#  inclusive, successes
pbinom( 8, 13, 0.393 ) # is P(X<=8)
pbinom( 4, 13, 0.393 ) # is P(X<=4)
# so the difference is P( 5 <= X <= 8)
pbinom( 8, 13, 0.393 ) - pbinom( 4, 13, 0.393 )

# Find the probability of getting less than 4 or
#     more than 10 successes
pbinom( 3, 13, 0.393 ) + (1 - pbinom(10,13,0.393))

############  here is one new feature
#    pbinom( 10,13,0.393) is P( X <= 10 )
#but pbinom( 10,13,0.393, lower.tail=FALSE) is
#                            P( X > 10 )
#   Therefore the previous problem could have been
#    answered with
pbinom( 3, 13, 0.393 ) + 
     pbinom( 10, 13, 0.393, lower.tail=FALSE )

############################################
##   Look at the measures of the binomial ##
##   distribution                         ##
############################################

# find the  mean  of a binomial distribution
#    where we have 17 trials and the probability
#    of success on a single trial is 0.2941
#
#    The mean turns out to be   n*p
17*0.2941  # is the mean
#
#   We can verify this by applying the law of
#   large numbers.
#   Create a huge listing of outcomes from 
#   0 to 17 where each value happens at the
#   probability of getting that many successes out
#   of 17 trials.
#   We will get about 10000 outcomes
outcomes <- NULL
for (i in 0:17 ) {
  expected_num <- pbinomeq(i,17,0.2941)*10000
  expected_num <- round(expected_num,0)
  outcomes <- c(outcomes, rep(i,expected_num))
}
# look at how many outcomes of each possible
# number of successes we have
table( outcomes )
# now find the mean of those outcomes
mean( outcomes )

 
# Now find the variance and the standard deviation
# of the same binomial distribution.
#   The variance is  n*p*(1-p)
17*0.2941*(1-0.2941)
#   The standard deviation is sqrt(n*p*(1-p))
sqrt(17*0.2941*(1-0.2941))
#
#   Compare those to our huge list of outcomes
source("../pop_sd.R")
pop_sd( outcomes )  # is the standard deviation
pop_sd( outcomes )^2  # is the variance